Brian M. Schilder, Bioinformatician II
# Import functions
root = "./"
source(file.path(root,"general_functions.R"))
import_parameters(params)## **** __Utilized Cores__ **** = 4$subsetGenes
## [1] "protein_coding"
##
## $subsetCells
## [1] "400"
##
## $resolution
## [1] "0.2"
##
## $resultsPath
## [1] "Results/subsetGenes-protein_coding__subsetCells-400__Resolution-0.2__perplexity-30__nCores-4"
##
## $nCores
## [1] 4
##
## $perplexity
## [1] "30"
load(file.path("scRNAseq_results.RData"))library(Seurat)
library(dplyr)
library(gridExtra)
library(knitr)
library(plotly)
library(ggplot2)
library(viridis)
library(reshape2)
library(shiny)
library(ggrepel)
library(DT)
library(ComplexHeatmap); #BiocManager::install("ComplexHeatmap")
## Install Bioconductor
# if (!requireNamespace("BiocManager"))
# install.packages("BiocManager")
library(biomaRt) # BiocManager::install(c("biomaRt"))
# library(DESeq2) # BiocManager::install(c("DESeq2"))
library(enrichR) #BiocManager::install("enrichR")
library(monocle) #BiocManager::install("monocle")
# BiocManager::install("DelayedMatrixStats")
# BiocManager::install("org.Mm.eg.db")
library(org.Hs.eg.db)
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
sessionInfo()## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.14.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] splines stats4 parallel grid stats graphics grDevices
## [8] utils datasets methods base
##
## other attached packages:
## [1] garnett_0.1.1 org.Hs.eg.db_3.7.0 AnnotationDbi_1.44.0
## [4] IRanges_2.16.0 S4Vectors_0.20.1 monocle_2.10.1
## [7] DDRTree_0.1.5 irlba_2.3.3 VGAM_1.0-6
## [10] Biobase_2.42.0 BiocGenerics_0.28.0 enrichR_1.0
## [13] biomaRt_2.38.0 ComplexHeatmap_1.20.0 ggrepel_0.8.0
## [16] reshape2_1.4.3 viridis_0.5.1 viridisLite_0.3.0
## [19] DT_0.5.1 shiny_1.2.0 plotly_4.8.0
## [22] knitr_1.21 gridExtra_2.3 dplyr_0.7.8
## [25] Seurat_2.3.4 Matrix_1.2-15 cowplot_0.9.4
## [28] ggplot2_3.1.0
##
## loaded via a namespace (and not attached):
## [1] snow_0.4-3 backports_1.1.3 circlize_0.4.5
## [4] Hmisc_4.2-0 plyr_1.8.4 igraph_1.2.2
## [7] lazyeval_0.2.1 densityClust_0.3 crosstalk_1.0.0
## [10] fastICA_1.2-1 digest_0.6.18 foreach_1.4.4
## [13] htmltools_0.3.6 lars_1.2 gdata_2.18.0
## [16] memoise_1.1.0 magrittr_1.5 checkmate_1.9.1
## [19] cluster_2.0.7-1 mixtools_1.1.0 ROCR_1.0-7
## [22] limma_3.38.3 matrixStats_0.54.0 docopt_0.6.1
## [25] R.utils_2.7.0 prettyunits_1.0.2 colorspace_1.4-0
## [28] blob_1.1.1 xfun_0.4 sparsesvd_0.1-4
## [31] crayon_1.3.4 RCurl_1.95-4.11 jsonlite_1.6
## [34] bindr_0.1.1 survival_2.43-3 zoo_1.8-4
## [37] iterators_1.0.10 ape_5.2 glue_1.3.0
## [40] gtable_0.2.0 GetoptLong_0.1.7 kernlab_0.9-27
## [43] shape_1.4.4 prabclus_2.2-7 DEoptimR_1.0-8
## [46] scales_1.0.0 pheatmap_1.0.12 mvtnorm_1.0-8
## [49] DBI_1.0.0 bibtex_0.4.2 Rcpp_1.0.0
## [52] metap_1.1 dtw_1.20-1 progress_1.2.0
## [55] xtable_1.8-3 htmlTable_1.13.1 reticulate_1.10
## [58] foreign_0.8-71 bit_1.1-14 proxy_0.4-22
## [61] mclust_5.4.2 SDMTools_1.1-221 Formula_1.2-3
## [64] tsne_0.1-3 htmlwidgets_1.3 httr_1.4.0
## [67] FNN_1.1.2.2 gplots_3.0.1.1 RColorBrewer_1.1-2
## [70] fpc_2.1-11.1 acepack_1.4.1 modeltools_0.2-22
## [73] ica_1.0-2 pkgconfig_2.0.2 XML_3.98-1.17
## [76] R.methodsS3_1.7.1 flexmix_2.3-14 nnet_7.3-12
## [79] tidyselect_0.2.5 labeling_0.3 rlang_0.3.1
## [82] later_0.8.0 munsell_0.5.0 tools_3.5.1
## [85] RSQLite_2.1.1 ggridges_0.5.1 evaluate_0.12
## [88] stringr_1.4.0 yaml_2.2.0 npsurv_0.4-0
## [91] bit64_0.9-7 fitdistrplus_1.0-14 robustbase_0.93-3
## [94] caTools_1.17.1.1 purrr_0.3.0 RANN_2.6.1
## [97] bindrcpp_0.2.2 pbapply_1.4-0 nlme_3.1-137
## [100] mime_0.6 slam_0.1-44 R.oo_1.22.0
## [103] hdf5r_1.0.1 compiler_3.5.1 rstudioapi_0.9.0
## [106] png_0.1-7 lsei_1.2-0 tibble_2.0.1
## [109] stringi_1.2.4 lattice_0.20-38 trimcluster_0.1-2.1
## [112] HSMMSingleCell_1.2.0 pillar_1.3.1 combinat_0.0-8
## [115] Rdpack_0.10-1 lmtest_0.9-36 GlobalOptions_0.1.0
## [118] data.table_1.12.0 bitops_1.0-6 gbRd_0.4-11
## [121] httpuv_1.4.5.1 R6_2.3.0 latticeExtra_0.6-28
## [124] promises_1.0.1 KernSmooth_2.23-15 codetools_0.2-16
## [127] MASS_7.3-51.1 gtools_3.8.1 assertthat_0.2.0
## [130] rjson_0.2.20 withr_2.1.2 qlcMatrix_0.9.7
## [133] hms_0.4.2 diptest_0.75-7 doSNOW_1.0.16
## [136] rpart_4.1-13 tidyr_0.8.2 class_7.3-15
## [139] rmarkdown_1.11 segmented_0.5-3.0 Rtsne_0.15
## [142] base64enc_0.1-3
print(paste("Seurat ", packageVersion("Seurat")))## [1] "Seurat 2.3.4"
Seurat has several tests for differential expression which can be set with the test.use parameter (see the DE vignette for details). For example, the ROC test returns the ‘classification power’ for any individual marker (ranging from 0 - random, to 1 - perfect).
Shown here: Biomarkers of each cluster vs. all other clusters.
# Limit to only top variable genes?:
# Set arg 'only.pos=F' to capture negative biomarkers
# DAT <- SetIdent(DAT, ident.use = "post_clustering")
DAT.markers <- FindAllMarkers(object = DAT, min.pct = 0.25, thresh.use = 0.25, only.pos = F,
test.use = "wilcox") # DESeq2## Cell group 2 is empty - no cells with identity class
## Warning in FindAllMarkers(object = DAT, min.pct = 0.25, thresh.use =
## 0.25, : No DE genes identified.
DAT.markers <- DAT.markers %>% mutate(FC = 2^avg_logFC)## Error in mutate_impl(.data, dots): Evaluation error: object 'avg_logFC' not found.
DAT.markers.sig <- DAT.markers %>% subset(p_val_adj<=0.05) ## Error in eval(e, x, parent.frame()): object 'p_val_adj' not found
markers.summary <- DAT.markers.sig %>% group_by(cluster) %>% tally()## Error in eval(lhs, parent, parent): object 'DAT.markers.sig' not found
# markers.summary <- base::merge(DAT.markers.sig %>% group_by(cluster) %>% tally(),
# DAT.markers %>% group_by(cluster) %>% summarise(mean(avg_logFC)),
# by="cluster" )
createDT(markers.summary, caption = "Number of DEGs and Mean logFC per Cluster")## Error in crosstalk::is.SharedData(data): object 'markers.summary' not found
createDT(DAT.markers, caption = paste("All Biomarkers: All Clusters"))topNum = 5
topBiomarkers <- DAT.markers %>% group_by(cluster) %>% top_n(topNum, avg_logFC)## Error in grouped_df_impl(data, unname(vars), drop): Column `cluster` is unknown
createDT(DAT.markers, caption = paste("All Biomarkers: All Clusters"))getTopBiomarker <- function(DAT.markers, clusterID, topN=1){
df <-DAT.markers %>%
subset(p_val_adj<0.05 & cluster==as.character(clusterID)) %>%
arrange(desc(avg_logFC))
top_pct_markers <- df[1:topN,"gene"]
return(top_pct_markers)
}
# clust1_biomarkers <- getTopBiomarker(DAT.markers, clusterID=1, topN=2)
# clust2_biomarkers <- getTopBiomarker(DAT.markers, clusterID=2, topN=2)
### Plot biomarkers
plotBiomarkers <- function(DAT, biomarkers, cluster){
biomarkerPlots <- list()
for (marker in biomarkers){
p <- VlnPlot(object = DAT, features.plot = c(marker), y.log=T, return.plotlist=T)
biomarkerPlots[[marker]] <- p + ggplot2::aes(alpha=0.5) + xlab( "Cluster") + ylab( "Expression")
}
combinedPlot <- do.call(grid.arrange, c(biomarkerPlots, list(ncol=2, top=paste("Top DEG Biomarkers for Cluster",cluster))) )
# biomarkerPlots <- lapply(biomarkers, function(marker) {
# VlnPlot(object = DAT, features.plot = c(marker), y.log=T, return.plotlist=T) %>% + ggplot2::ggtitle(marker) %>% ggplotly()
# })
# return(subplot(biomarkerPlots) )
}
top1 <- DAT.markers %>% group_by(cluster) %>% top_n(1, avg_logFC) ## Error in grouped_df_impl(data, unname(vars), drop): Column `cluster` is unknown
nCols <- floor( sqrt(length(unique(top1$cluster))) ) ## Error in unique(top1$cluster): object 'top1' not found
figHeight <- nCols *7## Error in eval(expr, envir, enclos): object 'nCols' not found
# Plot top 2 biomarker genes for each
for (clust in unique(DAT.markers$cluster)){
cat('\n')
cat("### Cluster ",clust,"\n")
biomarkers <- getTopBiomarker(DAT.markers, clusterID=clust, topN=2)
plotBiomarkers(DAT, biomarkers, clust)
cat('\n')
} ##Construct the plot object
volcanoPlot <- function(DEG_df, caption="", topFC_labeled=5){
DEG_df$sig<- ifelse( DEG_df$p_val_adj<0.05 & DEG_df$avg_logFC<1.5, "p_val_adj<0.05",
ifelse( DEG_df$p_val_adj<0.05 & DEG_df$avg_logFC>1.5, "p_val_adj<0.05 & avg_logFC>1.5",
"p_val_adj>0.05"
))
DEG_df <- arrange(DEG_df, desc(sig))
yMax <- max(-log10(DEG_df$p_val_adj)) + max(-log10(DEG_df$p_val_adj))/3 #ifelse(max(-log10(DEG_df$p_val_adj))<45, 50, max(-log10(DEG_df$p_val_adj)) + 10)
vol <- ggplot(data=DEG_df, aes(x=avg_logFC, y= -log10(p_val_adj))) +
geom_point(alpha=0.5, size=3, aes(col=sig)) +
scale_color_manual(values=list("p_val_adj<0.05"="turquoise3",
"p_val_adj<0.05 & avg_logFC>1.5"="purple",
"p_val_adj>0.05" = "darkgray")) +
theme(legend.position = "none") +
xlab(expression(paste("Average ",log^{2},"(fold change)"))) +
ylab(expression(paste(-log^{10},"(p-value)"))) + xlim(-2,2) + ylim(0, yMax) +
## ggrepl labels
geom_text_repel(data= arrange(DEG_df, p_val_adj, desc(avg_logFC))[1:topFC_labeled,],
# filter(DEG_df, avg_logFC>=1.5)[1:10,],
aes(label=gene), color="black", alpha=.5,
segment.color="black", segment.alpha=.5
) +
# Lines
geom_vline(xintercept= -1.5,lty=4, lwd=.3, alpha=.5) +
geom_vline(xintercept= 1.5,lty=4, lwd=.3, alpha=.5) +
geom_hline(yintercept= -log10(0.05),lty=4, lwd=.3, alpha=.5) +
ggtitle(caption)
print(vol)
}
# Run plots
for (clust in unique(DAT.markers$cluster)){
cat('\n')
cat("### Cluster ",clust,": Volcano","\n")
cap <- paste("Cluster",clust,"DEG Table")
DEG_df <- subset(DAT.markers, cluster==as.character(clust)) %>% arrange(desc(avg_logFC))
volcanoPlot(DEG_df, caption = cap)
createDT_html(DEG_df, caption = cap)
cat('\n')
} for (clust in top1$cluster){
subClust <- subset( top1, cluster==clust)
cat('\n')
cat("### Cluster",clust,"\n")
cat( "Biomarker\n",subClust$gene)
results <- Seurat::FindGeneTerms(QueryGene = subClust$gene)
print(results) #parse_html_notebook(results)
cat('\n')
}## Error in eval(expr, envir, enclos): object 'top1' not found
fp <- FeaturePlot(object = DAT, features.plot = top1$gene, cols.use = c("grey", "purple"),
reduction.use = "tsne", nCol = nCols, do.return = T)## Error in FeaturePlot(object = DAT, features.plot = top1$gene, cols.use = c("grey", : object 'nCols' not found
top5 <- DAT.markers %>% group_by(cluster) %>% top_n(5, avg_logFC)## Error in grouped_df_impl(data, unname(vars), drop): Column `cluster` is unknown
# setting slim.col.label to TRUE will print just the cluster IDS instead of
# every cell name
DoHeatmap(object = DAT, genes.use = top5$gene, slim.col.label=T, remove.key=T) ## Error in SetIfNull(x = genes.use, default = rownames(x = data.use)): object 'top5' not found
# ifelse(interactive, p %>% ggplotly() %>% toWebGL() %>% print(), print(p))RidgePlot(DAT, features.plot = top1$gene, nCol = nCols, do.sort = F)## Error in RidgePlot(DAT, features.plot = top1$gene, nCol = nCols, do.sort = F): object 'nCols' not found
Visualize biomarker expression for each cluster, by disease
top2 <- DAT.markers %>% group_by(cluster) %>% top_n(2, avg_logFC)## Error in grouped_df_impl(data, unname(vars), drop): Column `cluster` is unknown
sdp <- SplitDotPlotGG(DAT, genes.plot = top2$gene, cols.use = c("blue","red"),
x.lab.rot = T, plot.legend = T, dot.scale = 8, do.return = T, grouping.var = "dx")## Error in vars.all %in% rownames(object@data): object 'top2' not found
The following plots show the absolute expression of each biomarker, as opposed to avg_logFC which is dependent on the expression patterns of other cell types being compared.
markerList <- c("CD14", "FCGR3A")
get_markerDF <- function(DAT, markerList, meta_vars =c("barcode", "dx", "mut","post_clustering", "percent.mito","nGene", "nUMI")){
exp <- DAT@scale.data %>% data.frame()
marker.matrix <- exp[row.names(exp) %in% markerList, ]
marker.matrix$Gene <- row.names(marker.matrix)
markerMelt <- reshape2:::melt.data.frame(marker.matrix, id.vars = "Gene", variable.name = "Cell",value.name = "Expression")
metaSelect <- DAT@meta.data[,meta_vars]
markerDF <- merge(markerMelt,metaSelect, by.x="Cell", by.y="barcode")
return(markerDF)
}
markerDF <- get_markerDF(DAT, markerList)
createDT(markerDF, caption = "Known Marker Expression")## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
# Explore expression differences between groups
marker_vs_metadata <- function(markerDF, meta_var){
# Create title from ANOVA summary
ANOVAtitle <- function(markerDF, marker){
nTests <- length(unique(markerDF$Gene))
res <- anova(lm(data = subset(markerDF, Gene==marker),
formula = Expression ~ eval(parse(text=meta_var))))
title <-paste(paste("ANOVA (",marker, " vs. ",meta_var, ")", sep=""),
": p=",round(res$`Pr(>F)`,3),
", F=",round(res$`F value`,3),
ifelse(res$`Pr(>F)`<.05/nTests,"(Significant**)",
"(Non-significant)") )
}
title = ""
for (marker in unique(markerDF$Gene) ){
cat(marker)
title <- paste(title, "\n", ANOVAtitle(markerDF, marker))
}
ggplot(markerDF, aes(x=eval(parse(text=meta_var)), y=Expression, fill= Gene)) +
geom_violin() +
geom_point( position=position_jitterdodge(jitter.width = .2, dodge.width = .9 ), alpha=0.6, color="turquoise3") +
labs(title = title, x=meta_var) +
theme(plot.title = element_text( size=10)) +
scale_fill_manual(values=c("brown", "slategray"))
}marker_vs_metadata(markerDF, "dx")## FCGR3ACD14
marker_vs_metadata(markerDF, "mut")## FCGR3ACD14
# load("DAT.R")
# Convert Seurat objt to CDS object
mDAT <- monocle::importCDS(DAT, import_all = T)
# generate size factors for normalization later
mDAT <- estimateSizeFactors(mDAT)
# Get pre-trained PBMC classifer
load("./Garnet/hsPBMC") # Download from: https://cole-trapnell-lab.github.io/garnett/classifiers/hsPBMC
# Get feature genes for each cell type
feature_genes <- get_feature_genes(classifier = hsPBMC,
node = "root",
db = org.Hs.eg.db,
convert_ids = T)
head(feature_genes)
mDAT <- classify_cells(mDAT, hsPBMC,
db = org.Hs.eg.db,
cluster_extend = TRUE,
cds_gene_id_type = "SYMBOL")
head(pData(mDAT))
table(pData(mDAT)$cell_type)
table(pData(mDAT)$cluster_ext_type)
# Run tSNE: Plot Clusters and Cell Types
mDAT <- reduceDimension(mDAT, max_components = 3, reduction_method = "tSNE")
commonGeoms <- labs(x="tSNE1",y="tSNE2")
plot_grid(nrow = 2,
qplot(data = pData(mDAT), mDAT@reducedDimA[1,], mDAT@reducedDimA[2,], color = cell_type) + theme_bw() + commonGeoms,
qplot(data = pData(mDAT), mDAT@reducedDimA[1,], mDAT@reducedDimA[2,], color = cluster_ext_type) + theme_bw() + commonGeoms
)
# Unsupervised Clustering
mDAT <- clusterCells(mDAT, num_clusters = 5)
pData(mDAT)
plot_cell_clusters(mDAT, 1, 2, color = "Cluster", markers = c("CD14", "FCGR3A"))
plot_cell_clusters(mDAT, 1, 2, color = "Cluster") + facet_wrap(~dx)
plot_cell_clusters(mDAT, 1, 2, color = "Cluster") + facet_wrap(~mut)
plot_cell_clusters(mDAT, 1, 2, color = "Cluster") + facet_wrap(~cell_type)
DAT <- Seurat::AddMetaData(DAT, pData(mDAT)[c("garnett_cluster","cell_type","cluster_ext_type","Cluster")])WARNING: Very computationally expensive!
mDAT <- reduceDimension(mDAT, max_components = 2, method = 'DDRTree')
mDAT <- orderCells(mDAT)
plot_cell_trajectory(mDAT, color_by = "dx")
plot_cell_trajectory(mDAT, color_by = "cell_type")
plot_cell_trajectory(HSMM_myo, color_by = "cell_type") +
facet_wrap(~mut, nrow = 2)markerDF <- get_markerDF(DAT, markerList,
meta_vars =c("barcode", "dx", "mut","ID","post_clustering", "percent.mito","nGene", "nUMI" )) #cell_type
Spectral <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(length(unique(DAT@meta.data$mut)), "Spectral"))
# DAT <- DoKMeans(DAT, k.genes = 3)
# KMeansHeatmap(DAT)
if (T==F){
# Spectral <- heatmaply::Spectral(length(unique(DAT@meta.data$mut)))
markerMelt <- reshape2::acast(markerDF, Cell~Gene, value.var="Expression", fun.aggregate = mean, drop = F, fill = 0)
heatmaply::heatmaply(markerMelt, key.title="Expression",#plot_method= "ggplot",
k_row = dim(markerMelt)[2], dendrogram = "row",
showticklabels = c(T, F), xlab = "Known Markers", ylab = "Cells", column_text_angle = 45,
row_side_colors = DAT@meta.data[,c("dx","mut")], #cell_type
row_side_palette = Spectral
) %>% colorbar(tickfont = list(size = 12), titlefont = list(size = 14), which = 2) %>%
colorbar(tickfont = list(size = 12), titlefont = list(size = 14), which = 1)
}else{
# markerDF_sub <-subset(markerDF, Gene==markerList[1])
# var_to_colors(markerDF_sub, "post_clustering")
# library(pheatmap)
# pheatmap(markerMelt, annotation_row = markerDF_sub[c("dx","mut","cell_type")])
# pheatmap(markerMelt, kmeans_k = NA, annotation_row = markerDF_sub[c("dx","mut","cell_type")],
# cluster_cols = F, cutree_rows = length(unique(markerDF$post_clustering)), angle_col=45 )
library(RColorBrewer)
var_to_colors <- function(markerDF, metaVar){
colors <- brewer.pal(length(unique(markerDF[metaVar]) ), "Dark2")
sample(colors, length(unique(markerDF[metaVar])), replace = TRUE, prob = NULL)
# metaColors <- colors[ subset(markerDF, Gene==markerList[1])[metaVar][,1] %>% as.factor() ]
return(metaColors)
}
# library(GMD)
# myCols = cbind(var_to_colors(markerDF, "dx"), var_to_colors(markerDF, "mut"))
# rlab=t(cbind(
# var_to_colors(markerDF, "post_clustering"),
# var_to_colors(markerDF, "dx")
# ))
# heatmap.2(marker.matrix, key.title="Expression", col = viridis(300), trace="none",Colv = F, Rowv = F,
# labRow = F, xlab = "Biomarker", ylab="Cell", cexCol=1, RowSideColors = var_to_colors(markerDF, "post_clustering")
# )
# heatmap.3(markerMelt, dendrogram = 'row', kr = length(unique(markerDF)), labRow = F,
# xlab = "Biomarker", ylab = "Cell", RowSideColors = rlab, RowSideColorsSize=2 )
markerDF <- markerDF %>%
mutate_at(vars(post_clustering, dx, mut, ID), as.factor) %>% #cell_type
mutate(Cluster = post_clustering) %>%
arrange(post_clustering)
# markerMelt <- reshape2::acast(markerDF, Cell~Gene, value.var="Expression", fun.aggregate = mean, drop = F, fill = 0)
markerMelt <- dcast(markerDF, Cell + post_clustering + dx + mut + ID ~ Gene, #+ cell_type
fun.aggregate = mean, value.var = "Expression") %>% arrange(post_clustering)
marker.matrix <- markerMelt[markerList] %>%as.matrix()
row.names(marker.matrix) <- markerMelt$Cell
ha = HeatmapAnnotation(df = markerDF[c("dx","mut","ID","post_clustering")], which = "row") #cell_type
ComplexHeatmap::Heatmap(marker.matrix, col=viridis(300), column_title = "Biomarker", row_title = "Cell",
row_dend_reorder = F,show_row_names = F, show_column_dend = F,show_row_dend =T,
cluster_rows = T, column_title_side = "bottom",km = length(unique(markerMelt$post_clustering))) + ha
} markerDF <- markerDF %>% mutate(Cluster = post_clustering)
# Show mean exp for each marker
avgMarker <- markerDF %>% group_by(Gene, Cluster) %>% summarise(meanExp = mean(Expression))
p <- ggplot(data = avgMarker, aes(x=Gene, y=Cluster, fill=meanExp)) %>% + geom_tile() + scale_fill_viridis()
p# ifelse(interactive, p %>% ggplotly() %>% toWebGL() %>% print(), print(p))# Show mean exp for each marker
avgMarker <- markerDF %>% group_by(Gene, dx, Cluster) %>% summarise(meanExp = mean(Expression))
p <- ggplot(data = avgMarker, aes(x=Gene, y=dx, fill=meanExp)) %>% + geom_tile() + scale_fill_viridis()
p # ifelse(interactive, p %>% ggplotly() %>% toWebGL() %>% print(), print(p))ggplot(data = markerDF, aes(x=Cluster, y=Expression, fill=Gene)) %>%
+ geom_boxplot(alpha=0.5) %>% + scale_fill_manual(values=c("purple", "turquoise")) #, results = 'hide', fig.show='hide'
expressionTSNE <- function(DAT, marker, colors=c("grey", "red")){
FeaturePlot(object = DAT, features.plot = marker, cols.use = colors,
reduction.use = "tsne", nCol=2, do.return = T, dark.theme = T)[[1]]
# p <- ifelse(interactive, p %>% ggplotly() %>% toWebGL(), print(p))
}
plot_grid(expressionTSNE(DAT, markerList[1]),
expressionTSNE(DAT, markerList[2], colors=c("grey", "green")))## Error in GetDimReduction(object = object, reduction.type = reduction.use, : tsne dimensional reduction has not been computed
current.cluster.ids <- unique(DAT.markers$cluster) #c(0, 1, 2, 3, 4, 5, 6, 7)
top1 <- DAT.markers %>% group_by(cluster) %>% top_n(1, avg_logFC)## Error in grouped_df_impl(data, unname(vars), drop): Column `cluster` is unknown
new.cluster.ids <- top1$gene #c("CD4 T cells", "CD14+ Monocytes", "B cells", "CD8 T cells", "FCGR3A+ Monocytes", "NK cells", "Dendritic cells", "Megakaryocytes")## Error in eval(expr, envir, enclos): object 'top1' not found
DAT@ident <- plyr::mapvalues(x = DAT@ident, from = current.cluster.ids, to = new.cluster.ids)## Error in plyr::mapvalues(x = DAT@ident, from = current.cluster.ids, to = new.cluster.ids): object 'new.cluster.ids' not found
TSNEPlot(object=DAT, do.label=T, pt.size=0.5, do.return=T) ## Error in GetDimReduction(object = object, reduction.type = reduction.use, : tsne dimensional reduction has not been computed
# ifelse(interactive, p %>% ggplotly() %>% toWebGL() %>% print(), print(p)) # Available DGE methods:
## "wilcox", "bimod", "roc", "t", "tobit", "poisson", "negbinom", "MAST", "DESeq2"
runDGE <- function(DAT, meta_var, group1, group2, test.use="wilcox"){
#print(paste("DGE_allCells",meta_var,sep="_"))
DAT <- SetAllIdent(DAT, id = meta_var)
DAT <- StashIdent(DAT, save.name = meta_var)
DEGs <- FindMarkers(DAT, ident.1=group1, ident.2=group2, test.use=test.use, only.pos = F)
DEGs$gene <- row.names(DEGs)
cap <- paste("DEGs:\n",group1, "vs.", group2)
createDT_html(DEG_df, caption = cap)
volcanoPlot(DEG_df, caption = cap)
DAT <- SetAllIdent(DAT, id = "post_clustering")
return(DEGs)
}DEG_df <-runDGE(DAT, "dx", group1 = "PD", group2="control")## Error in crosstalk::is.SharedData(data): object 'DEG_df' not found
DEG_df <-runDGE(DAT, "mut", "LRRK2", "PD")## Error in createDT_html(DEG_df, caption = cap): object 'DEG_df' not found
DEG_df <-runDGE(DAT, "cell_type", "Monocytes", "Dendritic cells") DGE_within_clusters <- function(DAT, meta_var, group1, group2){
for (clust in unique(DAT@meta.data$post_clustering)){
# Subset cells by cluster
cat('\n')
cat("### ",paste("Cluster ",clust,": ",group1," vs. ", group2, sep="") , "\n")
DAT_clustSub <- Seurat::SubsetData(DAT, accept.value = clust, subset.raw = T)
DEG_df <-runDGE(DAT_clustSub, meta_var, group1, group2 )
cat('\n')
}
}DGE_within_clusters(DAT, "dx", "PD", "control") ## Error in createDT_html(DEG_df, caption = cap): object 'DEG_df' not found
DGE_within_clusters(DAT, "mut", "LRRK2", "PD")## Error in createDT_html(DEG_df, caption = cap): object 'DEG_df' not found
DGE_within_clusters(DAT, "cell_type", "Monocytes", "Dendritic cells")
DAT@meta.data$cell_type %>% table()If you perturb some of our parameter choices above (for example, setting resolution=0.8 or changing the number of PCs), you might see the CD4 T cells subdivide into two groups. You can explore this subdivision to find markers separating the two T cell subsets. However, before reclustering (which will overwrite object@ident), we can stash our renamed identities to be easily recovered later.
new_resolution <- 3.0
orig_resolution <- paste("resolution",resolution,sep="_")
DAT <- StashIdent(object = DAT, save.name = orig_resolution)
## Warning in BuildSNN(object = object, genes.use = genes.use, reduction.type
## = reduction.type, : Build parameters exactly match those of already
## computed and stored SNN. To force recalculation, set force.recalc to TRUE.
DAT <- FindClusters(object = DAT, reduction.type = "pca", dims.use = 1:10,
resolution = new_resolution, print.output = F)
DAT <- StashIdent(object = DAT, save.name = "resolution_3.0")
plot1 <- TSNEPlot(object = DAT, do.return = TRUE, no.legend = TRUE, do.label = TRUE, label.size=labSize)
plot2 <- TSNEPlot(object = DAT, do.return = TRUE, group.by = "post_clustering",
no.legend = TRUE, do.label = TRUE, label.size=labSize)
plot_grid(plot1, plot2)res3.0_markers <- FindAllMarkers(object = DAT, min.pct = 0.25, thresh.use = 0.25, only.pos = F, test.use = "wilcox")
top1_res3.0 <- res3.0_markers %>% group_by(cluster) %>% top_n(1, avg_logFC)
FeaturePlot(object = DAT, features.plot = top1_res3.0$gene, cols.use = c("green", "blue"))
# Set back to orig
DAT <- SetAllIdent(object = DAT, id = orig_resolution) save.image(file.path(resultsPath, "scRNAseq_results.RData"))